import requests
from bs4 import BeautifulSoup
import os
import csv
import time


def cut_quots(text):
    result = text.replace('“', '')
    return result.replace('”', '')


def clean_since(text):
    result = text.split(" ")
    return result[2]


def get_html(url):
    agent = 'user-agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36'
    header = {"User-Agent": agent}
    response = requests.get(url, headers=header)
    if response.ok:  # ok == 200
        return response.text
    return response.status_code


def write_csv(data):
    path_f = os.path.dirname(os.path.abspath(__file__))
    with open(
            os.path.join(path_f, "testimonials.csv"), "a", newline='', encoding='utf-8') as file_:
        order = [
            "category",
            "header",
            "content",
            "avtr",
            "autor",
            "acc_name",
            "since",
            "email",
            "tel"]
        writer_f = csv.DictWriter(file_, fieldnames=order)
        writer_f.writerow(data)


def get_arts(html):
    soup = BeautifulSoup(html, 'lxml')
    div_id = 'testimonial-2364-3-0-0'
    arts_cls = "testimonial-post"
    articles = soup.find('div', id=div_id).find_all('article', class_=arts_cls)
    return articles


def get_page_data(html):  # получение данных из html
    arts = get_arts(html)
    if len(arts) == 0:
        print("Parsing container is empty")
    else:
        for art in arts:
            parent_div_cont = art.find('div', class_="testimonial-content")
            parent_div_autr = art.find('div', class_="author-details")
            try:
                cat_cls = "testimonial-category"
                category = parent_div_cont.find(
                    'span', class_=cat_cls).text.strip()
            except:
                category = ""
            try:
                header = parent_div_cont.find('h2').text.strip()
            except:
                header = ""
            try:
                cont_cls = "entry-content"
                content = parent_div_cont.find(
                    'div', class_=cont_cls).text.strip()
            except:
                content = ""
            try:
                hide_cls = "coll-hidden"
                hide_content = parent_div_cont.find(
                    'span', class_=hide_cls).text.strip()
            except:
                hide_content = ""
            try:
                avtr = art.find('figure').find('img').get("src")
            except:
                avtr = ""
            try:
                autor = parent_div_autr.find(
                    "p", class_="testimonial-author").text.strip()
            except:
                autor = ""
            try:
                acc_name = parent_div_autr.find(
                    "p", class_="testimonial-author").find('span', class_="account-name").text.strip()
            except:
                acc_name = ""
            try:
                since = parent_div_autr.find(
                    "p", class_="traxer-since").text.strip()
            except:
                since = ""
            try:
                email = parent_div_autr.find("ul", class_="testimonial-meta").find(
                    "li", class_="email").find('a').text.strip()
            except:
                email = ""
            try:
                tel = parent_div_autr.find("ul", class_="testimonial-meta").find(
                    "li", class_="tel").text.strip()
            except:
                tel = ""

            data = {
                "category": category,
                "header": cut_quots(header),
                "content": f"{content} {hide_content}",
                "avtr": avtr,
                "autor": autor,
                "acc_name": acc_name,
                "since": clean_since(since),
                "email": email,
                "tel": tel,
            }
            print(data)
            write_csv(data)


def main():
    page = 1
    while True:
        url = f'https://catertrax.com/why-catertrax/traxers/page/{page}/'
        html = get_html(url)
        if html == 403 or html == 404:
            print(f"Error request: {html}")
            break
        else:
            arts = get_arts(html)
            if len(arts) != 0:  # если содержит articles
                get_page_data(html)
                page = page + 1
            else:
                print("Parsing done!")
                break


if __name__ == '__main__':
    start_time = time.time()
    main()
    print("--- %s seconds ---" % (time.time() - start_time))
